<html>
 <head>
  <meta charset="utf-8"/>
  <meta content="width=device-width, initial-scale=1, maximum-scale=1, user-scalable=no" name="viewport"/>
  <title>
   刘 成昊的回复  | 数螺 | NAUT IDEA
  </title>
  <link href="http://cdn.bootcss.com/bootstrap/3.3.6/css/bootstrap-theme.min.css" rel="stylesheet"/>
  <link href="http://cdn.bootcss.com/bootstrap/3.3.6/css/bootstrap.min.css" rel="stylesheet"/>
  <style type="text/css">
   #xmain img {
                  max-width: 100%;
                  display: block;
                  margin-top: 10px;
                  margin-bottom: 10px;
                }

                #xmain p {
                    line-height:150%;
                    font-size: 16px;
                    margin-top: 20px;
                }

                #xmain h2 {
                    font-size: 24px;
                }

                #xmain h3 {
                    font-size: 20px;
                }

                #xmain h4 {
                    font-size: 18px;
                }


                .header {
	           background-color: #0099ff;
	           color: #ffffff;
	           margin-bottom: 20px;
	        }

	        .header p {
                  margin: 0px;
                  padding: 10px 0;
                  display: inline-block;  
                  vertical-align: middle;
                  font-size: 16px;
               }

               .header a {
                 color: white;
               }

              .header img {
                 height: 25px;
              }
  </style>
  <script src="http://cdn.bootcss.com/jquery/3.0.0/jquery.min.js">
  </script>
  <script src="http://cdn.mathjax.org/mathjax/latest/MathJax.js?config=TeX-AMS-MML_HTMLorMML" type="text/javascript">
   MathJax.Hub.Config({elements: ["bbpress-forums"]});
  </script>
  <script src="http://nautstatic-10007657.file.myqcloud.com/static/css/readability.min.js" type="text/javascript">
  </script>
  <script type="text/javascript">
   $(document).ready(function() {
                 var loc = document.location;
                 var uri = {
                  spec: "http://cos.name/cn/profile/369906/replies/",
                  host: "http://cos.name",
                  prePath: "http://cos.name",
                  scheme: "http",
                  pathBase: "http://cos.name/"
                 };
    
                 var documentClone = document.cloneNode(true);
                 var article = new Readability(uri, documentClone).parse();
     
                 document.getElementById("xmain").innerHTML = article.content;
                });
  </script>
  <!-- 1466437785: Accept with keywords: (title(0.0):论坛, topn(0.366666666667):RSS订阅,菜鸟,复杂度,老帖,帖子,R语言,作者,会员,数据挖掘,聚类分析,用户名,主站,普通,参数,中心点,数据,收藏夹,函数,python,论坛,问题,密码,总计,拟合,版主,主题,算法,聚类,出树,讨论区).-->
 </head>
 <body class="bbp-user-page single singular bbpress single-author sidebar" onload="">
  <div class="header">
   <div class="container">
    <div class="row">
     <div class="col-xs-6 col-sm-6 text-left">
      <a href="/databee">
       <img src="http://nautidea-10007657.cos.myqcloud.com/logo_white.png"/>
      </a>
      <a href="/databee">
       <p>
        数螺
       </p>
      </a>
     </div>
     <div class="hidden-xs col-sm-6 text-right">
      <p>
       致力于数据科学的推广和知识传播
      </p>
     </div>
    </div>
   </div>
  </div>
  <div class="container text-center">
   <h1>
    刘 成昊的回复
   </h1>
  </div>
  <div class="container" id="xmain">
   <div class="hfeed site" id="page">
    <header class="site-header" id="masthead" role="banner">
     <div id="cos-logo">
      <a href="http://cos.name/cn">
       <img src="http://cos.name/cn/wp-content/themes/COS-forest/images/headers/cos-logo.png"/>
      </a>
     </div>
     <div class="navbar" id="navbar">
      <nav class="navigation main-navigation" id="site-navigation" role="navigation">
       <h3 class="menu-toggle">
        菜单
       </h3>
       <div class="menu-%e8%8f%9c%e5%8d%951-container">
        <ul class="nav-menu" id="menu-%e8%8f%9c%e5%8d%951">
         <li class="menu-item menu-item-type-custom menu-item-object-custom menu-item-home menu-item-407772" id="menu-item-407772">
          <a href="http://cos.name/cn/">
           论坛首页
          </a>
         </li>
         <li class="menu-item menu-item-type-custom menu-item-object-custom menu-item-407773" id="menu-item-407773">
          <a href="http://cos.name/cn/forums/">
           讨论区
          </a>
         </li>
         <li class="menu-item menu-item-type-custom menu-item-object-custom menu-item-407774" id="menu-item-407774">
          <a href="http://cos.name/cn/wp-login.php?action=register">
           注册
          </a>
         </li>
         <li class="menu-item menu-item-type-custom menu-item-object-custom menu-item-407819" id="menu-item-407819">
          <a href="http://cos.name/">
           主站
          </a>
         </li>
        </ul>
       </div>
      </nav>
      <!-- #site-navigation -->
     </div>
     <!-- #navbar -->
    </header>
    <!-- #masthead -->
    <div class="site-main" id="main">
     <div class="content-area" id="primary">
      <div class="site-content" id="content" role="main">
       <article class="post-0 type- status-publish hentry" id="post-0">
        <header class="entry-header">
         <h1 class="entry-title">
          刘 成昊
         </h1>
        </header>
        <!-- .entry-header -->
        <div class="entry-content">
         <div id="bbpress-forums">
          <div id="bbp-user-wrapper">
           <div id="bbp-single-user-details">
            <div id="bbp-user-avatar">
             <span class="vcard">
              <a class="url fn n" href="http://cos.name/cn/profile/369906/" rel="me" title="刘 成昊">
               <img src="http://sdn.geekzu.org/avatar/37d5b3199150afc2ff389bd4005a50fc?s=150&amp;d=monsterid&amp;r=g"/>
              </a>
             </span>
            </div>
            <!-- #author-avatar -->
            <div id="bbp-user-navigation">
             <ul>
              <li class="">
               <span class="vcard bbp-user-profile-link">
                <a class="url fn n" href="http://cos.name/cn/profile/369906/" rel="me" title="刘 成昊的档案">
                 档案
                </a>
               </span>
              </li>
              <li class="">
               <span class="bbp-user-topics-created-link">
                <a href="http://cos.name/cn/profile/369906/topics/" title="刘 成昊发起的主题">
                 发起的主题
                </a>
               </span>
              </li>
              <li class="current">
               <span class="bbp-user-replies-created-link">
                <a href="http://cos.name/cn/profile/369906/replies/" title="刘 成昊创建的回复">
                 创建的回复
                </a>
               </span>
              </li>
              <li class="">
               <span class="bbp-user-favorites-link">
                <a href="http://cos.name/cn/profile/369906/favorites/" title="刘 成昊的收藏夹">
                 收藏夹
                </a>
               </span>
              </li>
             </ul>
            </div>
            <!-- #bbp-user-navigation -->
           </div>
           <!-- #bbp-single-user-details -->
           <div id="bbp-user-body">
            <div class="bbp-user-replies-created" id="bbp-user-replies-created">
             <h2 class="entry-title">
              回复的主题
             </h2>
             <div class="bbp-user-section">
              <div class="bbp-pagination">
               <div class="bbp-pagination-count">
                查看 15 个帖子 - 1 到 15（总计 102 个）
               </div>
               <div class="bbp-pagination-links">
                <span class="page-numbers current">
                 1
                </span>
                <a class="page-numbers" href="http://cos.name/cn/profile/369906/replies/page/2/">
                 2
                </a>
                <span class="page-numbers dots">
                 …
                </span>
                <a class="page-numbers" href="http://cos.name/cn/profile/369906/replies/page/7/">
                 7
                </a>
                <a class="next page-numbers" href="http://cos.name/cn/profile/369906/replies/page/2/">
                 →
                </a>
               </div>
              </div>
              <ul class="forums bbp-replies" id="topic-0-replies">
               <li class="bbp-header">
                <div class="bbp-reply-author">
                 作者
                </div>
                <!-- .bbp-reply-author -->
                <div class="bbp-reply-content">
                 帖子
                </div>
                <!-- .bbp-reply-content -->
               </li>
               <!-- .bbp-header -->
               <li class="bbp-body">
                <div class="bbp-reply-header" id="post-410812">
                 <div class="bbp-meta">
                  <span class="bbp-reply-post-date">
                   2015年4月13日 下午3:34
                  </span>
                  <span class="bbp-header">
                   回复：
                   <a class="bbp-topic-permalink" href="http://cos.name/cn/topic/410787/">
                    有偿寻求合作
                   </a>
                  </span>
                  <a class="bbp-reply-permalink" href="http://cos.name/cn/topic/410787/#post-410812">
                   2 楼
                  </a>
                  <span class="bbp-admin-links">
                  </span>
                 </div>
                 <!-- .bbp-meta -->
                </div>
                <!-- #post-410812 -->
                <div class="odd bbp-parent-forum-1010 bbp-parent-topic-410787 bbp-reply-position-2 user-id-369906 post-410812 reply type-reply status-publish hentry">
                 <div class="bbp-reply-author">
                  <a class="bbp-author-avatar" href="http://cos.name/cn/profile/369906/" rel="nofollow" title="查看刘 成昊的档案">
                   <img src="http://sdn.geekzu.org/avatar/37d5b3199150afc2ff389bd4005a50fc?s=80&amp;d=monsterid&amp;r=g"/>
                  </a>
                  <br/>
                  <a class="bbp-author-name" href="http://cos.name/cn/profile/369906/" rel="nofollow" title="查看刘 成昊的档案">
                   刘 成昊
                  </a>
                  <br/>
                  <div class="bbp-author-role">
                   普通会员
                  </div>
                 </div>
                 <!-- .bbp-reply-author -->
                 <div class="bbp-reply-content">
                  <p>
                   Call for Software
                   <br/>
                   Neurocomputing Journal (Impact Factor=2.005)
                   <br/>
                   For our New Software Track using novel academic content standards for software developed by Elsevier and Github
                  </p>
                  <p>
                   We believe Software is scientific method executed by a machine, and therefore should be an integral part of the
                   <br/>
                   scientific communication ecosystem. Even though some journals have been experimenting, we feel software itself is
                   <br/>
                   currently not systematically treated as a full and equal academic citizen.
                  </p>
                  <p>
                   Elsevier intends take open science to a next level and make software itself a fully-fledged academic publication by
                   <br/>
                   making code, pieces of software or complete tools as findable, indexable, archivable, searchable, citable, referable as
                   <br/>
                   the trusted papers we’re used to. To this end specific editorial support, version management of the software
                   <br/>
                   published, and a tailor made review process that extends to multiple software and code releases will be offered.
                  </p>
                  <p>
                   Introducing a new scientific format : ORIGINAL SOFTWARE PUBLICATIONS (OSP)
                  </p>
                  <p>
                   Elsevier is working with software development platforms like GitHub to create a new academic content class: Original
                   <br/>
                   Software Publications (OSP). These are short overviews that only describe major/significant software and code
                   <br/>
                   artefacts, including post publication updates (versions) and systematically capture all metadata needed to expose this
                   <br/>
                   work to interested readers and users. The OSP, as well as subsequent updates thereof, will be peer reviewed and
                   <br/>
                   considered “one body of work” for citation and indexing purposes, creating the canonical academic reference point for
                   <br/>
                   your body of software work including all its versions/ releases.
                  </p>
                  <p>
                   New research publications have always been founded on earlier work, and the same applies to software. What
                   <br/>
                   constitutes enough of a ‘body of work’ to be publishable as one OSP is decided by the editorial policies of journals
                   <br/>
                   supporting this new class of scientific contribution. For instance, bug fixes and many other post publication releases
                   <br/>
                   are not considered publishable “major new work”, and are incorporated into the initial OSP publication by means of
                   <br/>
                   submitting an software update.
                  </p>
                  <p>
                   Should the authors/ developers reach a significant major new release they may need to submit a new OSP for
                   <br/>
                   review- and this submission is treated as a new body of work. Obviously authors can cite their earlier (software) work,
                   <br/>
                   relevant theory, and other comparable works.
                  </p>
                  <p>
                   All software and code published is, and will remain, fully owned by their developers. Currently all software and code
                   <br/>
                   submitted for review and evaluation must be released under one of the pre-approved licenses:
                   <br/>
                   *             Apache License, 2.0 (Apache-2.0)
                   <br/>
                   *             BSD 3-Clause “New” or “Revised” license (BSD-3-Clause)
                   <br/>
                   *             BSD 3-Clause “Simplified” or “FreeBSD” license (BSD-2-Clause)
                   <br/>
                   *             GNU General Public License (GPL)
                   <br/>
                   *             GNU Library or “Lesser” General Public License (LGPL)
                   <br/>
                   *             MIT license (MIT)
                   <br/>
                   *             Mozilla Public License 2.0 (MPL-2.0)
                   <br/>
                   *             Common Development and Distribution License (CDDL-1.0)
                   <br/>
                   *             Eclipse Public License (EPL-1.0)
                   <br/>
                   *             Creative Commons Zero (CC0)
                  </p>
                  <p>
                   SUBMISSIONS
                   <br/>
                   There are two types of submissions for your OSP:
                   <br/>
                   1.            Submission Process of your FIRST OSP
                   <br/>
                   2.            Submission Process of your Software UPDATES to the OSP
                  </p>
                  <p>
                   More detailed submission instructions can be found at:
                   <br/>
                   <a href="http://www.elsevier.com/about/content-innovation/original-software-publications#instructions-how-to-submit-your-software-work" rel="nofollow">
                    http://www.elsevier.com/about/content-innovation/original-software-publications#instructions-how-to-submit-your-software-work
                   </a>
                  </p>
                  <p>
                   Your software and code will be fast tracked for review by the dedicated editorial board of Neurocomputing’s
                   <br/>
                   new software track. Experts will review your software based on: (i) the quality of the OSP manuscript, (ii) originality, novelty, and
                   <br/>
                   significance of the software contribution, (iii) the quality, completeness, and readability of the source codes, and (iv)
                   <br/>
                   the reproducibility of the empirical results, and (v) the accessibility of the software.
                  </p>
                  <p>
                   After review and acceptance, your software and/or code will be copied to the journal archive on GitHub and
                   <br/>
                   integrated with the online version of your OSP available on ScienceDirect. Your work will be abstracted and indexed,
                   <br/>
                   optimized for search, fully citable, archived, and fully counted as a scientific contribution.
                  </p>
                  <p>
                   If you have questions or wish to provide feedback on Original Software Publications please contact Elsevier support
                   <br/>
                   at
                   <a href="mailto:OSP@elsevier.com" rel="nofollow">
                    OSP@elsevier.com
                   </a>
                   .
                   <br/>
                   For GitHub support please email
                   <a href="mailto:science@github.com" rel="nofollow">
                    science@github.com
                   </a>
                   and kindly use “OSP” in your email header
                  </p>
                  <p>
                   ABOUT NEUROCOMPUTING JOURNAL
                   <br/>
                   Neurocomputing (Impact Factor: 2.005) publishes articles describing recent fundamental contributions in the field of
                   <br/>
                   neurocomputing, including theory, practice and applications.
                   <a href="http://www.journals.elsevier.com/neurocomputing/" rel="nofollow">
                    http://www.journals.elsevier.com/neurocomputing/
                   </a>
                  </p>
                  <p>
                   Associate Editor in Chief for Neurocomputing Software Track
                   <br/>
                   Professor Steven C.H. Hoi,
                   <br/>
                   School of Information Systems, Singapore Management University, Singapore
                   <br/>
                   E-mail:
                   <a href="mailto:chhoi@smu.edu.sg" rel="nofollow">
                    chhoi@smu.edu.sg
                   </a>
                   or
                   <a href="mailto:stevenhoi@gmail.com" rel="nofollow">
                    stevenhoi@gmail.com
                   </a>
                  </p>
                 </div>
                 <!-- .bbp-reply-content -->
                </div>
                <!-- .reply -->
                <div class="bbp-reply-header" id="post-402515">
                 <div class="bbp-meta">
                  <span class="bbp-reply-post-date">
                   2014年5月30日 上午6:13
                  </span>
                  <span class="bbp-header">
                   回复：
                   <a class="bbp-topic-permalink" href="http://cos.name/cn/topic/156320/">
                    【求助】问答系统相关研究与发展？
                   </a>
                  </span>
                  <a class="bbp-reply-permalink" href="http://cos.name/cn/topic/156320/#post-402515">
                   2 楼
                  </a>
                  <span class="bbp-admin-links">
                  </span>
                 </div>
                 <!-- .bbp-meta -->
                </div>
                <!-- #post-402515 -->
                <div class="even bbp-parent-forum-1006 bbp-parent-topic-156320 bbp-reply-position-2 user-id-369906 post-402515 reply type-reply status-publish hentry">
                 <div class="bbp-reply-author">
                  <a class="bbp-author-avatar" href="http://cos.name/cn/profile/369906/" rel="nofollow" title="查看刘 成昊的档案">
                   <img src="http://sdn.geekzu.org/avatar/37d5b3199150afc2ff389bd4005a50fc?s=80&amp;d=monsterid&amp;r=g"/>
                  </a>
                  <br/>
                  <a class="bbp-author-name" href="http://cos.name/cn/profile/369906/" rel="nofollow" title="查看刘 成昊的档案">
                   刘 成昊
                  </a>
                  <br/>
                  <div class="bbp-author-role">
                   普通会员
                  </div>
                 </div>
                 <!-- .bbp-reply-author -->
                 <div class="bbp-reply-content">
                  <p>
                   昨天奶茶mm 在微软推出的新产品 就是这个 附上新闻链接
                   <br/>
                   <a href="http://tech.it168.com/a2014/0529/1628/000001628736.shtml" rel="nofollow">
                    http://tech.it168.com/a2014/0529/1628/000001628736.shtml
                   </a>
                  </p>
                 </div>
                 <!-- .bbp-reply-content -->
                </div>
                <!-- .reply -->
                <div class="bbp-reply-header" id="post-345266">
                 <div class="bbp-meta">
                  <span class="bbp-reply-post-date">
                   2013年4月11日 上午7:56
                  </span>
                  <span class="bbp-header">
                   回复：
                   <a class="bbp-topic-permalink" href="http://cos.name/cn/topic/109784/">
                    有没有高维数据，非参数统计（样条法）或者R语言精通的同学。
                   </a>
                  </span>
                  <a class="bbp-reply-permalink" href="http://cos.name/cn/topic/109784/#post-345266">
                   9 楼
                  </a>
                  <span class="bbp-admin-links">
                  </span>
                 </div>
                 <!-- .bbp-meta -->
                </div>
                <!-- #post-345266 -->
                <div class="odd bbp-parent-forum-991 bbp-parent-topic-109784 bbp-reply-position-9 user-id-369906 post-345266 reply type-reply status-publish hentry">
                 <div class="bbp-reply-author">
                  <a class="bbp-author-avatar" href="http://cos.name/cn/profile/369906/" rel="nofollow" title="查看刘 成昊的档案">
                   <img src="http://sdn.geekzu.org/avatar/37d5b3199150afc2ff389bd4005a50fc?s=80&amp;d=monsterid&amp;r=g"/>
                  </a>
                  <br/>
                  <a class="bbp-author-name" href="http://cos.name/cn/profile/369906/" rel="nofollow" title="查看刘 成昊的档案">
                   刘 成昊
                  </a>
                  <br/>
                  <div class="bbp-author-role">
                   普通会员
                  </div>
                 </div>
                 <!-- .bbp-reply-author -->
                 <div class="bbp-reply-content">
                  <p>
                   Dirichlet Process。。。。。。
                  </p>
                 </div>
                 <!-- .bbp-reply-content -->
                </div>
                <!-- .reply -->
                <div class="bbp-reply-header" id="post-345265">
                 <div class="bbp-meta">
                  <span class="bbp-reply-post-date">
                   2013年4月11日 上午7:53
                  </span>
                  <span class="bbp-header">
                   回复：
                   <a class="bbp-topic-permalink" href="http://cos.name/cn/topic/109977/">
                    缺失数据没有识别为NA怎么办
                   </a>
                  </span>
                  <a class="bbp-reply-permalink" href="http://cos.name/cn/topic/109977/#post-345265">
                   2 楼
                  </a>
                  <span class="bbp-admin-links">
                  </span>
                 </div>
                 <!-- .bbp-meta -->
                </div>
                <!-- #post-345265 -->
                <div class="even bbp-parent-forum-999 bbp-parent-topic-109977 bbp-reply-position-2 user-id-369906 post-345265 reply type-reply status-publish hentry">
                 <div class="bbp-reply-author">
                  <a class="bbp-author-avatar" href="http://cos.name/cn/profile/369906/" rel="nofollow" title="查看刘 成昊的档案">
                   <img src="http://sdn.geekzu.org/avatar/37d5b3199150afc2ff389bd4005a50fc?s=80&amp;d=monsterid&amp;r=g"/>
                  </a>
                  <br/>
                  <a class="bbp-author-name" href="http://cos.name/cn/profile/369906/" rel="nofollow" title="查看刘 成昊的档案">
                   刘 成昊
                  </a>
                  <br/>
                  <div class="bbp-author-role">
                   普通会员
                  </div>
                 </div>
                 <!-- .bbp-reply-author -->
                 <div class="bbp-reply-content">
                  <p>
                   read.table 里的参数 na.strings 自己设一下
                  </p>
                 </div>
                 <!-- .bbp-reply-content -->
                </div>
                <!-- .reply -->
                <div class="bbp-reply-header" id="post-345264">
                 <div class="bbp-meta">
                  <span class="bbp-reply-post-date">
                   2013年4月11日 上午7:52
                  </span>
                  <span class="bbp-header">
                   回复：
                   <a class="bbp-topic-permalink" href="http://cos.name/cn/topic/109931/">
                    使用R对用户数据进行聚类分析遇到的问题和一些思考
                   </a>
                  </span>
                  <a class="bbp-reply-permalink" href="http://cos.name/cn/topic/109931/#post-345264">
                   8 楼
                  </a>
                  <span class="bbp-admin-links">
                  </span>
                 </div>
                 <!-- .bbp-meta -->
                </div>
                <!-- #post-345264 -->
                <div class="odd bbp-parent-forum-999 bbp-parent-topic-109931 bbp-reply-position-8 user-id-369906 post-345264 reply type-reply status-publish hentry">
                 <div class="bbp-reply-author">
                  <a class="bbp-author-avatar" href="http://cos.name/cn/profile/369906/" rel="nofollow" title="查看刘 成昊的档案">
                   <img src="http://sdn.geekzu.org/avatar/37d5b3199150afc2ff389bd4005a50fc?s=80&amp;d=monsterid&amp;r=g"/>
                  </a>
                  <br/>
                  <a class="bbp-author-name" href="http://cos.name/cn/profile/369906/" rel="nofollow" title="查看刘 成昊的档案">
                   刘 成昊
                  </a>
                  <br/>
                  <div class="bbp-author-role">
                   普通会员
                  </div>
                 </div>
                 <!-- .bbp-reply-author -->
                 <div class="bbp-reply-content">
                  <p>
                   1 数据都可以映射到numeric上
                   <br/>
                   2 20w的数据 真心不算大 对R来说却是略坑 有些Kmeans并行化的idea可以直接借用过来，切分聚成小的中心点，最后汇聚起来
                   <br/>
                   3 推荐用下python的sklearn，有个minibathch kmeans R好像没有online的聚类，每次都数据框一大坨加载进来
                   <br/>
                   4 某些情况提升聚类效果，PCA还是有用的
                   <br/>
                   5 因为要迭代 map/reduce很慢的，可以手写,kmeans不难，mahout也有现成的可以用,0.8版好像还加入了streamingKmeans BallKmeans等
                   <br/>
                   6 聚类中心点 最近的样本 有代表性 ，还有outlier的点
                  </p>
                 </div>
                 <!-- .bbp-reply-content -->
                </div>
                <!-- .reply -->
                <div class="bbp-reply-header" id="post-342965">
                 <div class="bbp-meta">
                  <span class="bbp-reply-post-date">
                   2013年2月28日 上午6:23
                  </span>
                  <span class="bbp-header">
                   回复：
                   <a class="bbp-topic-permalink" href="http://cos.name/cn/topic/109440/">
                    哪位能给统计学大一新生一点学习建议？菜鸟需要指导
                   </a>
                  </span>
                  <a class="bbp-reply-permalink" href="http://cos.name/cn/topic/109440/#post-342965">
                   13 楼
                  </a>
                  <span class="bbp-admin-links">
                  </span>
                 </div>
                 <!-- .bbp-meta -->
                </div>
                <!-- #post-342965 -->
                <div class="even bbp-parent-forum-990 bbp-parent-topic-109440 bbp-reply-position-13 user-id-369906 post-342965 reply type-reply status-publish hentry">
                 <div class="bbp-reply-author">
                  <a class="bbp-author-avatar" href="http://cos.name/cn/profile/369906/" rel="nofollow" title="查看刘 成昊的档案">
                   <img src="http://sdn.geekzu.org/avatar/37d5b3199150afc2ff389bd4005a50fc?s=80&amp;d=monsterid&amp;r=g"/>
                  </a>
                  <br/>
                  <a class="bbp-author-name" href="http://cos.name/cn/profile/369906/" rel="nofollow" title="查看刘 成昊的档案">
                   刘 成昊
                  </a>
                  <br/>
                  <div class="bbp-author-role">
                   普通会员
                  </div>
                 </div>
                 <!-- .bbp-reply-author -->
                 <div class="bbp-reply-content">
                  <p>
                   好好谈恋爱
                  </p>
                 </div>
                 <!-- .bbp-reply-content -->
                </div>
                <!-- .reply -->
                <div class="bbp-reply-header" id="post-339755">
                 <div class="bbp-meta">
                  <span class="bbp-reply-post-date">
                   2012年11月21日 上午8:35
                  </span>
                  <span class="bbp-header">
                   回复：
                   <a class="bbp-topic-permalink" href="http://cos.name/cn/topic/108641/">
                    在R中高维聚类的函数有哪些？
                   </a>
                  </span>
                  <a class="bbp-reply-permalink" href="http://cos.name/cn/topic/108641/#post-339755">
                   7 楼
                  </a>
                  <span class="bbp-admin-links">
                  </span>
                 </div>
                 <!-- .bbp-meta -->
                </div>
                <!-- #post-339755 -->
                <div class="odd bbp-parent-forum-999 bbp-parent-topic-108641 bbp-reply-position-7 user-id-369906 post-339755 reply type-reply status-publish hentry">
                 <div class="bbp-reply-author">
                  <a class="bbp-author-avatar" href="http://cos.name/cn/profile/369906/" rel="nofollow" title="查看刘 成昊的档案">
                   <img src="http://sdn.geekzu.org/avatar/37d5b3199150afc2ff389bd4005a50fc?s=80&amp;d=monsterid&amp;r=g"/>
                  </a>
                  <br/>
                  <a class="bbp-author-name" href="http://cos.name/cn/profile/369906/" rel="nofollow" title="查看刘 成昊的档案">
                   刘 成昊
                  </a>
                  <br/>
                  <div class="bbp-author-role">
                   普通会员
                  </div>
                 </div>
                 <!-- .bbp-reply-author -->
                 <div class="bbp-reply-content">
                  <p>
                   回复 第6楼 的 肖楠：可能更准确的表述是incremental learning，假设lz提到的是numeric类型，48字节，100万100维整个数据集大概4个G，加载到工作空间是可行的，R有一些从算法层面优化就像版主提到的，可是有些算法空间复杂度很高啊，像PCA，ridge regression时间也到了O(n^3)。采用incremental learning就不需要考虑这些问题，时间快 不会内存不足。R的data import好像也没stream的概念，可以借助数据库一行行读进来，然后一点点update model。
                  </p>
                 </div>
                 <!-- .bbp-reply-content -->
                </div>
                <!-- .reply -->
                <div class="bbp-reply-header" id="post-339677">
                 <div class="bbp-meta">
                  <span class="bbp-reply-post-date">
                   2012年11月19日 上午10:59
                  </span>
                  <span class="bbp-header">
                   回复：
                   <a class="bbp-topic-permalink" href="http://cos.name/cn/topic/108629/">
                    求推荐有关数据挖掘相关的中文版书籍
                   </a>
                  </span>
                  <a class="bbp-reply-permalink" href="http://cos.name/cn/topic/108629/#post-339677">
                   6 楼
                  </a>
                  <span class="bbp-admin-links">
                  </span>
                 </div>
                 <!-- .bbp-meta -->
                </div>
                <!-- #post-339677 -->
                <div class="even bbp-parent-forum-999 bbp-parent-topic-108629 bbp-reply-position-6 user-id-369906 post-339677 reply type-reply status-publish hentry">
                 <div class="bbp-reply-author">
                  <a class="bbp-author-avatar" href="http://cos.name/cn/profile/369906/" rel="nofollow" title="查看刘 成昊的档案">
                   <img src="http://sdn.geekzu.org/avatar/37d5b3199150afc2ff389bd4005a50fc?s=80&amp;d=monsterid&amp;r=g"/>
                  </a>
                  <br/>
                  <a class="bbp-author-name" href="http://cos.name/cn/profile/369906/" rel="nofollow" title="查看刘 成昊的档案">
                   刘 成昊
                  </a>
                  <br/>
                  <div class="bbp-author-role">
                   普通会员
                  </div>
                 </div>
                 <!-- .bbp-reply-author -->
                 <div class="bbp-reply-content">
                  <p>
                   最近有本书 大数据：互联网大规模数据挖掘与分布式处理 讲的挺多的
                  </p>
                 </div>
                 <!-- .bbp-reply-content -->
                </div>
                <!-- .reply -->
                <div class="bbp-reply-header" id="post-339676">
                 <div class="bbp-meta">
                  <span class="bbp-reply-post-date">
                   2012年11月19日 上午10:58
                  </span>
                  <span class="bbp-header">
                   回复：
                   <a class="bbp-topic-permalink" href="http://cos.name/cn/topic/106040/">
                    beanplot在展示、比较多个模型训练、测试误差好给力
                   </a>
                  </span>
                  <a class="bbp-reply-permalink" href="http://cos.name/cn/topic/106040/#post-339676">
                   6 楼
                  </a>
                  <span class="bbp-admin-links">
                  </span>
                 </div>
                 <!-- .bbp-meta -->
                </div>
                <!-- #post-339676 -->
                <div class="odd bbp-parent-forum-1011 bbp-parent-topic-106040 bbp-reply-position-6 user-id-369906 post-339676 reply type-reply status-publish hentry">
                 <div class="bbp-reply-author">
                  <a class="bbp-author-avatar" href="http://cos.name/cn/profile/369906/" rel="nofollow" title="查看刘 成昊的档案">
                   <img src="http://sdn.geekzu.org/avatar/37d5b3199150afc2ff389bd4005a50fc?s=80&amp;d=monsterid&amp;r=g"/>
                  </a>
                  <br/>
                  <a class="bbp-author-name" href="http://cos.name/cn/profile/369906/" rel="nofollow" title="查看刘 成昊的档案">
                   刘 成昊
                  </a>
                  <br/>
                  <div class="bbp-author-role">
                   普通会员
                  </div>
                 </div>
                 <!-- .bbp-reply-author -->
                 <div class="bbp-reply-content">
                  <p>
                   都说R画图厉害，不会呀，好着急
                  </p>
                 </div>
                 <!-- .bbp-reply-content -->
                </div>
                <!-- .reply -->
                <div class="bbp-reply-header" id="post-339674">
                 <div class="bbp-meta">
                  <span class="bbp-reply-post-date">
                   2012年11月19日 上午10:13
                  </span>
                  <span class="bbp-header">
                   回复：
                   <a class="bbp-topic-permalink" href="http://cos.name/cn/topic/108641/">
                    在R中高维聚类的函数有哪些？
                   </a>
                  </span>
                  <a class="bbp-reply-permalink" href="http://cos.name/cn/topic/108641/#post-339674">
                   5 楼
                  </a>
                  <span class="bbp-admin-links">
                  </span>
                 </div>
                 <!-- .bbp-meta -->
                </div>
                <!-- #post-339674 -->
                <div class="even bbp-parent-forum-999 bbp-parent-topic-108641 bbp-reply-position-5 user-id-369906 post-339674 reply type-reply status-publish hentry">
                 <div class="bbp-reply-author">
                  <a class="bbp-author-avatar" href="http://cos.name/cn/profile/369906/" rel="nofollow" title="查看刘 成昊的档案">
                   <img src="http://sdn.geekzu.org/avatar/37d5b3199150afc2ff389bd4005a50fc?s=80&amp;d=monsterid&amp;r=g"/>
                  </a>
                  <br/>
                  <a class="bbp-author-name" href="http://cos.name/cn/profile/369906/" rel="nofollow" title="查看刘 成昊的档案">
                   刘 成昊
                  </a>
                  <br/>
                  <div class="bbp-author-role">
                   普通会员
                  </div>
                 </div>
                 <!-- .bbp-reply-author -->
                 <div class="bbp-reply-content">
                  <p>
                   1 100维哪叫高维呀
                   <br/>
                   2 你这无非数据多了点 找online learning的
                   <br/>
                   3 非要batch 可以用 mahout
                  </p>
                 </div>
                 <!-- .bbp-reply-content -->
                </div>
                <!-- .reply -->
                <div class="bbp-reply-header" id="post-339622">
                 <div class="bbp-meta">
                  <span class="bbp-reply-post-date">
                   2012年11月18日 上午5:17
                  </span>
                  <span class="bbp-header">
                   回复：
                   <a class="bbp-topic-permalink" href="http://cos.name/cn/topic/108617/">
                    请教个关于“过拟合”的问题
                   </a>
                  </span>
                  <a class="bbp-reply-permalink" href="http://cos.name/cn/topic/108617/#post-339622">
                   5 楼
                  </a>
                  <span class="bbp-admin-links">
                  </span>
                 </div>
                 <!-- .bbp-meta -->
                </div>
                <!-- #post-339622 -->
                <div class="odd bbp-parent-forum-1006 bbp-parent-topic-108617 bbp-reply-position-5 user-id-369906 post-339622 reply type-reply status-publish hentry">
                 <div class="bbp-reply-author">
                  <a class="bbp-author-avatar" href="http://cos.name/cn/profile/369906/" rel="nofollow" title="查看刘 成昊的档案">
                   <img src="http://sdn.geekzu.org/avatar/37d5b3199150afc2ff389bd4005a50fc?s=80&amp;d=monsterid&amp;r=g"/>
                  </a>
                  <br/>
                  <a class="bbp-author-name" href="http://cos.name/cn/profile/369906/" rel="nofollow" title="查看刘 成昊的档案">
                   刘 成昊
                  </a>
                  <br/>
                  <div class="bbp-author-role">
                   普通会员
                  </div>
                 </div>
                 <!-- .bbp-reply-author -->
                 <div class="bbp-reply-content">
                  <p>
                   lz  可以贴上论文的原话么
                  </p>
                 </div>
                 <!-- .bbp-reply-content -->
                </div>
                <!-- .reply -->
                <div class="bbp-reply-header" id="post-339575">
                 <div class="bbp-meta">
                  <span class="bbp-reply-post-date">
                   2012年11月16日 下午3:11
                  </span>
                  <span class="bbp-header">
                   回复：
                   <a class="bbp-topic-permalink" href="http://cos.name/cn/topic/108617/">
                    请教个关于“过拟合”的问题
                   </a>
                  </span>
                  <a class="bbp-reply-permalink" href="http://cos.name/cn/topic/108617/#post-339575">
                   2 楼
                  </a>
                  <span class="bbp-admin-links">
                  </span>
                 </div>
                 <!-- .bbp-meta -->
                </div>
                <!-- #post-339575 -->
                <div class="even bbp-parent-forum-1006 bbp-parent-topic-108617 bbp-reply-position-2 user-id-369906 post-339575 reply type-reply status-publish hentry">
                 <div class="bbp-reply-author">
                  <a class="bbp-author-avatar" href="http://cos.name/cn/profile/369906/" rel="nofollow" title="查看刘 成昊的档案">
                   <img src="http://sdn.geekzu.org/avatar/37d5b3199150afc2ff389bd4005a50fc?s=80&amp;d=monsterid&amp;r=g"/>
                  </a>
                  <br/>
                  <a class="bbp-author-name" href="http://cos.name/cn/profile/369906/" rel="nofollow" title="查看刘 成昊的档案">
                   刘 成昊
                  </a>
                  <br/>
                  <div class="bbp-author-role">
                   普通会员
                  </div>
                 </div>
                 <!-- .bbp-reply-author -->
                 <div class="bbp-reply-content">
                  <p>
                   我最多用过十万树 还是过拟合，OOB什么的不准啊，最后还是用CV，于是不相信那些理论了
                  </p>
                 </div>
                 <!-- .bbp-reply-content -->
                </div>
                <!-- .reply -->
                <div class="bbp-reply-header" id="post-339574">
                 <div class="bbp-meta">
                  <span class="bbp-reply-post-date">
                   2012年11月16日 下午3:09
                  </span>
                  <span class="bbp-header">
                   回复：
                   <a class="bbp-topic-permalink" href="http://cos.name/cn/topic/108618/">
                    random forest树之间的相关性
                   </a>
                  </span>
                  <a class="bbp-reply-permalink" href="http://cos.name/cn/topic/108618/#post-339574">
                   2 楼
                  </a>
                  <span class="bbp-admin-links">
                  </span>
                 </div>
                 <!-- .bbp-meta -->
                </div>
                <!-- #post-339574 -->
                <div class="odd bbp-parent-forum-1006 bbp-parent-topic-108618 bbp-reply-position-2 user-id-369906 post-339574 reply type-reply status-publish hentry">
                 <div class="bbp-reply-author">
                  <a class="bbp-author-avatar" href="http://cos.name/cn/profile/369906/" rel="nofollow" title="查看刘 成昊的档案">
                   <img src="http://sdn.geekzu.org/avatar/37d5b3199150afc2ff389bd4005a50fc?s=80&amp;d=monsterid&amp;r=g"/>
                  </a>
                  <br/>
                  <a class="bbp-author-name" href="http://cos.name/cn/profile/369906/" rel="nofollow" title="查看刘 成昊的档案">
                   刘 成昊
                  </a>
                  <br/>
                  <div class="bbp-author-role">
                   普通会员
                  </div>
                 </div>
                 <!-- .bbp-reply-author -->
                 <div class="bbp-reply-content">
                  <p>
                   训练出树的数据有重叠
                  </p>
                 </div>
                 <!-- .bbp-reply-content -->
                </div>
                <!-- .reply -->
                <div class="bbp-reply-header" id="post-339562">
                 <div class="bbp-meta">
                  <span class="bbp-reply-post-date">
                   2012年11月16日 上午5:55
                  </span>
                  <span class="bbp-header">
                   回复：
                   <a class="bbp-topic-permalink" href="http://cos.name/cn/topic/108613/">
                    问个关于bagging的问题。
                   </a>
                  </span>
                  <a class="bbp-reply-permalink" href="http://cos.name/cn/topic/108613/#post-339562">
                   4 楼
                  </a>
                  <span class="bbp-admin-links">
                  </span>
                 </div>
                 <!-- .bbp-meta -->
                </div>
                <!-- #post-339562 -->
                <div class="even bbp-parent-forum-1006 bbp-parent-topic-108613 bbp-reply-position-4 user-id-369906 post-339562 reply type-reply status-publish hentry">
                 <div class="bbp-reply-author">
                  <a class="bbp-author-avatar" href="http://cos.name/cn/profile/369906/" rel="nofollow" title="查看刘 成昊的档案">
                   <img src="http://sdn.geekzu.org/avatar/37d5b3199150afc2ff389bd4005a50fc?s=80&amp;d=monsterid&amp;r=g"/>
                  </a>
                  <br/>
                  <a class="bbp-author-name" href="http://cos.name/cn/profile/369906/" rel="nofollow" title="查看刘 成昊的档案">
                   刘 成昊
                  </a>
                  <br/>
                  <div class="bbp-author-role">
                   普通会员
                  </div>
                 </div>
                 <!-- .bbp-reply-author -->
                 <div class="bbp-reply-content">
                  <p>
                   因为树特别容易overfitting,其他有类似特征的model用bagging往往也能提升性能
                  </p>
                 </div>
                 <!-- .bbp-reply-content -->
                </div>
                <!-- .reply -->
                <div class="bbp-reply-header" id="post-339509">
                 <div class="bbp-meta">
                  <span class="bbp-reply-post-date">
                   2012年11月14日 下午12:32
                  </span>
                  <span class="bbp-header">
                   回复：
                   <a class="bbp-topic-permalink" href="http://cos.name/cn/topic/14107/">
                    我来闲话一下R的debug经验
                   </a>
                  </span>
                  <a class="bbp-reply-permalink" href="http://cos.name/cn/topic/14107/#post-339509">
                   9 楼
                  </a>
                  <span class="bbp-admin-links">
                  </span>
                 </div>
                 <!-- .bbp-meta -->
                </div>
                <!-- #post-339509 -->
                <div class="odd bbp-parent-forum-999 bbp-parent-topic-14107 bbp-reply-position-9 user-id-369906 post-339509 reply type-reply status-publish hentry">
                 <div class="bbp-reply-author">
                  <a class="bbp-author-avatar" href="http://cos.name/cn/profile/369906/" rel="nofollow" title="查看刘 成昊的档案">
                   <img src="http://sdn.geekzu.org/avatar/37d5b3199150afc2ff389bd4005a50fc?s=80&amp;d=monsterid&amp;r=g"/>
                  </a>
                  <br/>
                  <a class="bbp-author-name" href="http://cos.name/cn/profile/369906/" rel="nofollow" title="查看刘 成昊的档案">
                   刘 成昊
                  </a>
                  <br/>
                  <div class="bbp-author-role">
                   普通会员
                  </div>
                 </div>
                 <!-- .bbp-reply-author -->
                 <div class="bbp-reply-content">
                  <p>
                   我觉得老帖的质量明显高于现在
                  </p>
                 </div>
                 <!-- .bbp-reply-content -->
                </div>
                <!-- .reply -->
               </li>
               <!-- .bbp-body -->
               <li class="bbp-footer">
                <div class="bbp-reply-author">
                 作者
                </div>
                <div class="bbp-reply-content">
                 帖子
                </div>
                <!-- .bbp-reply-content -->
               </li>
               <!-- .bbp-footer -->
              </ul>
              <!-- #topic-0-replies -->
              <div class="bbp-pagination">
               <div class="bbp-pagination-count">
                查看 15 个帖子 - 1 到 15（总计 102 个）
               </div>
               <div class="bbp-pagination-links">
                <span class="page-numbers current">
                 1
                </span>
                <a class="page-numbers" href="http://cos.name/cn/profile/369906/replies/page/2/">
                 2
                </a>
                <span class="page-numbers dots">
                 …
                </span>
                <a class="page-numbers" href="http://cos.name/cn/profile/369906/replies/page/7/">
                 7
                </a>
                <a class="next page-numbers" href="http://cos.name/cn/profile/369906/replies/page/2/">
                 →
                </a>
               </div>
              </div>
             </div>
            </div>
            <!-- #bbp-user-replies-created -->
           </div>
          </div>
         </div>
        </div>
        <!-- .entry-content -->
        <footer class="entry-meta">
        </footer>
        <!-- .entry-meta -->
       </article>
       <!-- #post -->
      </div>
      <!-- #content -->
     </div>
     <!-- #primary -->
     <div class="sidebar-container" id="tertiary" role="complementary">
      <div class="sidebar-inner">
       <div class="widget-area">
        <aside class="widget bbp_widget_login" id="bbp_login_widget-2">
         <h3 class="widget-title">
          登录
         </h3>
         <form action="http://cos.name/cn/wp-login.php" class="bbp-login-form" method="post">
          <fieldset>
           <legend>
            登录
           </legend>
           <div class="bbp-username">
            <label for="user_login">
             用户名:
            </label>
           </div>
           <div class="bbp-password">
            <label for="user_pass">
             密码:
            </label>
           </div>
           <div class="bbp-remember-me">
            <label for="rememberme">
             记住用户名
            </label>
           </div>
           <div class="bbp-submit-wrapper">
            <button class="button submit user-submit" id="user-submit" name="user-submit" tabindex="104" type="submit">
             登录
            </button>
           </div>
           <div class="bbp-login-links">
            <a class="bbp-register-link" href="http://cos.name/cn/wp-login.php?action=register" title="注册">
             注册
            </a>
            <a class="bbp-lostpass-link" href="http://cos.name/cn/wp-login.php?action=lostpassword" title="忘记密码">
             忘记密码
            </a>
           </div>
          </fieldset>
         </form>
        </aside>
        <aside class="widget widget_text" id="text-7">
         <h3 class="widget-title">
          搜索
         </h3>
         <div class="textwidget">
          <form action="http://www.google.com/search" id="bbp-search-form" method="get" onsubmit="Gsitesearch(this)" role="search">
           <div>
           </div>
          </form>
          <form id="bbp-search-form-baidu" onsubmit="g(this)" role="search">
           <div>
           </div>
          </form>
         </div>
        </aside>
        <aside class="widget widget_text" id="text-2">
         <h3 class="widget-title">
          新鲜事
         </h3>
         <div class="textwidget">
          <ul>
           <li>
            <a href="http://cos.name/cn/topics/">
             最新帖子
            </a>
           </li>
           <li>
            <a href="http://cos.name/cn/view/popular/">
             最热门主题
            </a>
           </li>
           <li>
            <a href="http://cos.name/cn/view/no-replies/">
             消灭零回复
            </a>
           </li>
          </ul>
         </div>
        </aside>
        <aside class="widget widget_text" id="text-3">
         <h3 class="widget-title">
          RSS订阅
         </h3>
         <div class="textwidget">
          <ul>
           <li>
            <img src="http://cos.name/wp-includes/images/rss.png"/>
            <a href="http://cos.name/cn/topics/feed/">
             所有主题
            </a>
           </li>
           <li>
            <img src="http://cos.name/wp-includes/images/rss.png"/>
            <a href="http://cos.name/cn/forums/feed/">
             所有帖子
            </a>
           </li>
          </ul>
         </div>
        </aside>
       </div>
       <!-- .widget-area -->
      </div>
      <!-- .sidebar-inner -->
     </div>
     <!-- #tertiary -->
    </div>
    <!-- #main -->
    <footer class="site-footer" id="colophon" role="contentinfo">
     <div class="site-info">
      版权所有 © 2014 统计之都 | 由
      <a href="http://wordpress.org/">
       WordPress
      </a>
      构建 | 主题修改自
      <a href="http://wordpress.org/themes/twentythirteen">
       Twenty Thirteen
      </a>
     </div>
     <!-- .site-info -->
    </footer>
    <!-- #colophon -->
   </div>
   <!-- #page -->
  </div>
 </body>
</html>